# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request
from datetime import datetime
from zc_core.spiders.base import BaseSpider
from zc_core.dao.catalog_dao import CatalogDao

from ctg.rules import *
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.done_filter import DoneFilter
from ctg.utils.login import *
from ctg.dao.sku_dao import *
from scrapy.utils.project import get_project_settings


class FullSpider(BaseSpider):
    name = 'full'

    # 无cookie商品接口
    item_url = 'https://ego.ctg.com.cn/mall-basedoc/ware?wareId={}&mall_type=ORDINARY'

    # sha
    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def start_requests(self):
        # pool_list = SkuPoolDao().get_sku_pool_list()
        # pool_list = list(Mongo().get_collection('sku_20210606').find({"$or":[{"supplierName":{"$ne":"京东"}},{"supplierName":"京东","soldCount":{"$gt":0}}]}))
        cat1_filters = list(self.settings.get('CAT1_WHITE_LIST', {}).keys())
        cat3_list = CatalogDao().get_cat3_list_by_cat1(batch_no=self.batch_no, cat1_list=cat1_filters)
        cat_filter = [x.get('_id') for x in cat3_list]
        pool_list = SkuDao().get_batch_sku_list(
            batch_no=self.batch_no,
            fields={"_id": 1, "offlineTime": 1, 'originPrice': 1, 'salePrice': 1},
            query={"catalog3Id": {"$in": cat_filter}}
        )
        # query={"catalog3Id": {"$in": cat_filter},
        #        "$or": [{"supplierId": {"$ne": "89087"}}, {"supplierId": "89087", "soldCount": {"$gt": 0}}]})
        self.logger.info('全量：%s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sku in pool_list:
            sku_id = sku.get('_id')
            offline_time = sku.get('offlineTime', 0)
            settings = get_project_settings()
            origin_price = sku.get('originPrice')
            sale_price = sku.get('salePrice')
            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            # 避免重复采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: [%s]', sku_id)
                continue
            # 采集商品
            yield Request(
                url=self.item_url.format(sku_id),
                headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.46',
                    'X-Requested-With': 'XMLHttpRequest'
                },
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                    'originPrice': origin_price,
                    'salePrice': sale_price
                },
                callback=self.parse_item_data,
                errback=self.error_back,
            )

    # 处理ItemData
    def parse_item_data(self, response):
        meta = response.meta
        sku_id = meta.get('skuId')

        data = parse_item_data(response)
        if data:
            self.logger.info('商品: [%s]' % data.get('skuId'))
            yield data
        else:
            self.logger.info('下架: sku=%s' % sku_id)
